##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
## Engaged or Obedient?  Racially Differentiated Models of Democratic Education
## Replication File- Word Frequency

# Inputs: white_50.RData, non_white_50.RData, white_70.RData, non_white_70.RData
# Outputs: Data for Table B.1, Table B.2. Produces Figure 2, Figure B.1
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##


rm(list=ls())

library(readr)
library(quanteda)
library(quanteda.corpora)
library(ggcorrplot)
library(ggplot2)
library(dplyr)
library(lubridate)
library(stats)
library(patchwork)
library(tm)

#Set your working directory to the location of the replication files
setwd("~/Desktop/TAD Handbooks Project/engaged_obedient_replication")

# Word Frequency by Simple Majority of White or Non-White Students (50%)
white<-load("data/white_50.RData")
non_white<-load("data/non_white_50.RData")

### Raw Data for Table B.1 
#View top 30 features of each dfm
topfeatures(white_dfm_trim, 30)
topfeatures(non_white_dfm_trim, 30)

###Figure 2
#Remove words in common to isolate 15 unique features
white_dfm_trim_2 = dfm_remove(white_dfm_trim, c("activ"))
non_white_dfm_trim_2 = dfm_remove(non_white_dfm_trim, c("activ")) 

# Make a Bar Plot for Maj White Schools, P1
tstat_freq <- textstat_frequency(white_dfm_trim_2, n = 15)
head(tstat_freq, n=15)

p1<- ggplot(data = tstat_freq, aes(x = nrow(tstat_freq):1, y = frequency)) +
  geom_bar(stat = 'identity') +
  facet_wrap(~ group, scales = "free") +
  coord_flip() +
  scale_x_continuous(breaks = nrow(tstat_freq):1,
                     labels = tstat_freq$feature) +
  labs(title = "Majority White Schools (>50%)", x = NULL, y = "Frequency")


# Make a Bar Plot for Majority Non-White Schools, P2
tstat_freq <- textstat_frequency(non_white_dfm_trim_2, n = 15)
head(tstat_freq, n=15)

p2<- ggplot(data = tstat_freq, aes(x = nrow(tstat_freq):1, y = frequency)) +
  geom_bar(stat = 'identity') +
  facet_wrap(~ group, scales = "free") +
  coord_flip() +
  scale_x_continuous(breaks = nrow(tstat_freq):1,
                     labels = tstat_freq$feature) +
  labs(title = "Majority Non-White Schools (>50%)", x = NULL, y = "Frequency")

#Join figures for visual
fig1 <- (p1 + p2) 

fig1 

#Save visual to "figures" folder in working director
ggsave("figures/engaged_obedient_figure_2.pdf", width = 11, height = 7)
ggsave("figures/engaged_obedient_figure_2.jpeg", width = 11, height = 7)


# Word Frequency by Larger Majority of White or Non-White Students (70%)
white<-load("data/white_70.RData")
non_white<-load("data/non_white_70.RData")

### Raw Data for Table B.2
topfeatures(white_dfm_trim, 30)
topfeatures(non_white_dfm_trim, 30)

###Figure B.1
#Remove words in common to isolate unique features
white_dfm_trim_2 = dfm_remove(white_dfm_trim, c("teacher", "provide", "time", "person", "activ", "grade", "guardian", "follow", "respons", "program", "attend", "offic")) 
non_white_dfm_trim_2 = dfm_remove(non_white_dfm_trim, c("teacher", "provide", "time", "person", "activ", "grade", "guardian", "follow", "respons", "program", "attend", "offic")) 

#View Top Features- general
topfeatures(white_dfm_trim_2, 15)
topfeatures(non_white_dfm_trim_2, 15)


# Make a Bar Plot for Majority White Schools, P1
tstat_freq <- textstat_frequency(white_dfm_trim_2, n = 15)
head(tstat_freq, n=15)

p1<- ggplot(data = tstat_freq, aes(x = nrow(tstat_freq):1, y = frequency)) +
  geom_bar(stat = 'identity') +
  facet_wrap(~ group, scales = "free") +
  coord_flip() +
  scale_x_continuous(breaks = nrow(tstat_freq):1,
                     labels = tstat_freq$feature) +
  labs(title = "Majority White Schools (>70%)", x = NULL, y = "Frequency")


# Make a Bar Plot for Majority Non-White Schools, P2
tstat_freq <- textstat_frequency(non_white_dfm_trim_2, n = 15)
head(tstat_freq, n=15)

p2<- ggplot(data = tstat_freq, aes(x = nrow(tstat_freq):1, y = frequency)) +
  geom_bar(stat = 'identity') +
  facet_wrap(~ group, scales = "free") +
  coord_flip() +
  scale_x_continuous(breaks = nrow(tstat_freq):1,
                     labels = tstat_freq$feature) +
  labs(title = "Majority Non-White Schools (>70%)", x = NULL, y = "Frequency")


#Place figures side by side and save to "figures" folder
fig2 <- (p1 + p2) 
fig2
ggsave("figures/engaged_obedient_B_Figure_1.pdf", width = 11, height = 7)
ggsave("figures/engaged_obedient_B_Figure_1.jpeg", width = 11, height = 7)

